import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
from matplotlib import pylab
import yaml
import os
warnings.filterwarnings('ignore')
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white')
pylab.rcParams['figure.figsize'] = (9, 9)
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5
OXPHOG = ["MT-ND1","MT-ND2","MT-ND3","MT-ND4","MT-ND4","MT-ND5","MT-ND6","MT-CYB","MT-CO1","MT-CO2","MT-CO3","MT-ATP6","MT-ATP8","MT-RNR2"]
hostRoot = "-".join(socket.gethostname().split('-')[0:2])
#indir=paths["paths"]["indir"][hostRoot]
outdir="./outdir"
dataDir = "./data"
colorMapPath = "./colorMap.yaml"
with open(colorMapPath, 'r') as f:
colorMap = yaml.load(f, Loader=yaml.FullLoader)["uns_colors"]
control3_piece2 = sc.read_10x_h5(dataDir+"/control3_piece2/count/sample_feature_bc_matrix.h5")
control1_piece1 = sc.read_10x_h5(dataDir+"/control1_piece1/count/sample_feature_bc_matrix.h5")
control2_piece2 = sc.read_10x_h5(dataDir+"/control2_piece2/count/sample_feature_bc_matrix.h5")
control3_piece1 = sc.read_10x_h5(dataDir+"/control3_piece1/count/sample_feature_bc_matrix.h5")
control1_piece3 = sc.read_10x_h5(dataDir+"/control1_piece3/count/sample_feature_bc_matrix.h5")
control1_piece2 = sc.read_10x_h5(dataDir+"/control1_piece2/count/sample_feature_bc_matrix.h5")
control2_piece3= sc.read_10x_h5(dataDir+"/control2_piece3/count/sample_feature_bc_matrix.h5")
control3_piece3= sc.read_10x_h5(dataDir+"/control3_piece3/count/sample_feature_bc_matrix.h5")
control2_piece1 = sc.read_10x_h5(dataDir+"/control2_piece1/count/sample_feature_bc_matrix.h5")
polaroid2_medial = sc.read_10x_h5(dataDir+"/polaroid2_medial/count/sample_feature_bc_matrix.h5")
polaroid3_medial = sc.read_10x_h5(dataDir+"/polaroid3_medial/count/sample_feature_bc_matrix.h5")
polaroid3_distal = sc.read_10x_h5(dataDir+"/polaroid3_distal/count/sample_feature_bc_matrix.h5")
polaroid1_distal = sc.read_10x_h5(dataDir+"/polaroid1_distal/count/sample_feature_bc_matrix.h5")
polaroid2_distal = sc.read_10x_h5(dataDir+"/polaroid2_distal/count/sample_feature_bc_matrix.h5")
polaroid1_medial = sc.read_10x_h5(dataDir+"/polaroid1_medial/count/sample_feature_bc_matrix.h5")
polaroid1_proximal= sc.read_10x_h5(dataDir+"/polaroid1_proximal/count/sample_feature_bc_matrix.h5")
polaroid2_proximal= sc.read_10x_h5(dataDir+"/polaroid2_proximal/count/sample_feature_bc_matrix.h5")
polaroid3_proximal = sc.read_10x_h5(dataDir+"/polaroid3_proximal/count/sample_feature_bc_matrix.h5")
reading ./data/control3_piece2/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control1_piece1/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control2_piece2/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control3_piece1/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control1_piece3/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control1_piece2/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control2_piece3/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control3_piece3/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/control2_piece1/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid2_medial/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid3_medial/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid3_distal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid1_distal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid2_distal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid1_medial/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid1_proximal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid2_proximal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading ./data/polaroid3_proximal/count/sample_feature_bc_matrix.h5
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
(0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
adDict = {"control3_piece2":control3_piece2,"control1_piece1":control1_piece1,"control2_piece2":control2_piece2,"control3_piece1":control3_piece1,"control1_piece3":control1_piece3,"control1_piece2":control1_piece2,"control2_piece3":control2_piece3,"control3_piece3":control3_piece3,"control2_piece1":control2_piece1,"polaroid3_medial":polaroid3_medial,"polaroid2_medial":polaroid2_medial,"polaroid3_distal":polaroid3_distal,"polaroid1_distal":polaroid1_distal,"polaroid2_distal":polaroid2_distal,"polaroid1_medial":polaroid1_medial,"polaroid1_proximal":polaroid1_proximal,"polaroid2_proximal":polaroid2_proximal,"polaroid3_proximal":polaroid3_proximal}
for adata in list(adDict.keys()):
adDict[adata]
adDict[adata].obs["dataset"] = adata
adDict[adata].obs["organoid"] = adata.split("_")[0]
adDict[adata].obs["region"] = adata.split("_")[1]
adDict[adata].obs_names = adDict[adata].obs_names + "_" +adata
adDict[adata].var_names_make_unique()
adata = ad.concat(list(adDict.values()), join="outer")
adata.obs["type"] = adata.obs["organoid"].str[:-1]
adata.obs["type"].value_counts()
adata.obs["type_region"] = adata.obs["type"].astype("str") + "_" +adata.obs["region"].astype("str")
adata.obs["regionContrast"] = adata.obs["region"]
adata.obs["regionContrast"] = adata.obs["regionContrast"].replace({"piece2":"control","piece1":"control","piece3":"control"}).astype("category")
adata.obs["dataset"] = adata.obs["dataset"].astype("category")
adata.obs["organoid"] = adata.obs["organoid"].astype("category")
adata.obs["region"] = adata.obs["region"].astype("category")
adata.obs["type"] = adata.obs["type"].astype("category")
adata.obs["type_region"] = adata.obs["type_region"].astype("category")
with open(colorMapPath, 'r') as f:
colorMap = yaml.load(f, Loader=yaml.FullLoader)["uns_colors"]
colorMap
adata.uns["regionContrast_colors"] = [colorMap[i]["color"] for i in adata.obs["regionContrast"].cat.categories]
adata.uns["region_colors"] = [colorMap[i]["color"] for i in adata.obs["region"].cat.categories]
adata.uns["type_colors"] = [colorMap[i]["color"] for i in adata.obs["type"].cat.categories]
#adata.uns["regionContrast_colors"] = [colorMap[i]["color"] for i in adata.obs["regionContrast"].cat.categories]
sc.pl.highest_expr_genes(adata, n_top=20, )
normalizing counts per cell
finished (0:00:00)
adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'
adata.var['ribo'] = adata.var_names.str.startswith('RP') # annotate the group of ribosomal genes as 'ribo'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=True, inplace=True)
sc.pp.calculate_qc_metrics(adata, qc_vars=['ribo'], percent_top=None, log1p=True, inplace=True)
sc.pl.violin(adata, ['pct_counts_mt','pct_counts_ribo'],
groupby= "dataset", jitter=0.4, multi_panel=True, rotation=90)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'],
groupby= "dataset", jitter=0.4, multi_panel=True, rotation=90)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'],
groupby= "organoid", jitter=0.4, multi_panel=True, rotation=90)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'],
groupby= "region", jitter=0.4, multi_panel=True, rotation=90)
adata.write_h5ad(outdir+"/1_polaroid_mint.h5ad")